In [73]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.tree import plot_tree
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")
import plotly.io as pio
pio.renderers.default = 'notebook'
In [74]:
df = pd.read_csv('Dataset_for_Food_and_Beverages.csv')
df.head()
Out[74]:
| Shop_Id | Shop_Name | Shop_Location | Shop_Type | Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Nepal Cafe | Maharajgunj | Bistro | Yes | 5821831 | 423 | 96 | No | 4.3 |
| 1 | 2 | Everest Cake Shop | Baneshwor | Grill | Yes | 7381237 | 506 | 101 | No | 4.8 |
| 2 | 3 | Birat Grill | Bhaktapur | Restaurant | No | 9057127 | 174 | 251 | Yes | 5.0 |
| 3 | 4 | Birat Grocery | Bhaktapur | Grocery | Yes | 4254663 | 238 | 80 | Yes | 3.5 |
| 4 | 5 | Chitwan Grocery | Lalitpur | Grocery | No | 3122248 | 60 | 218 | Yes | 2.0 |
Data Cleaning¶
In [75]:
# Sorting
df = df.sort_values("Shop_Name")
df
Out[75]:
| Shop_Id | Shop_Name | Shop_Location | Shop_Type | Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | |
|---|---|---|---|---|---|---|---|---|---|---|
| 117 | 118 | Annapurna Bakery | Jawalakhel | Bakery | Yes | 3436104 | 65 | 197 | Yes | 2.0 |
| 19 | 20 | Annapurna Bistro | Jawalakhel | Bistro | Yes | 8025449 | 506 | 139 | Yes | 4.8 |
| 169 | 170 | Annapurna Bistro | Maharajgunj | Bistro | Yes | 8324338 | 548 | 164 | No | 4.8 |
| 99 | 100 | Annapurna Cafe | Patan | Bistro | No | 5265481 | 347 | 71 | Yes | 3.0 |
| 71 | 72 | Annapurna Cake Shop | Patan | Bistro | No | 5761502 | 417 | 82 | Yes | 4.3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 123 | 124 | Pokhara Quick Stop | Jawalakhel | Bistro | Yes | 6288847 | 375 | 104 | No | 4.5 |
| 155 | 156 | Pokhara Restaurant | Bhaktapur | Restaurant | Yes | 8998621 | 248 | 270 | No | 5.0 |
| 5 | 6 | Pokhara Restaurant | Jawalakhel | Restaurant | No | 4148829 | 298 | 100 | Yes | 3.5 |
| 17 | 18 | Pokhara Supermarket | Lalitpur | Grocery | Yes | 3725943 | 286 | 92 | No | 3.0 |
| 167 | 168 | Pokhara Supermarket | Baneshwor | Grocery | No | 6674133 | 494 | 127 | No | 4.8 |
200 rows × 10 columns
In [76]:
df.isnull().sum()
Out[76]:
Shop_Id 0 Shop_Name 0 Shop_Location 0 Shop_Type 0 Shop_Website 0 Yearly_Sales 0 Average_Order_Value 0 Foot_Traffic 0 Marketing 0 Rating 0 dtype: int64
In [77]:
# Dropping Unnecessary Columns
df = df.drop("Shop_Id", axis=1)
df.head()
Out[77]:
| Shop_Name | Shop_Location | Shop_Type | Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | |
|---|---|---|---|---|---|---|---|---|---|
| 117 | Annapurna Bakery | Jawalakhel | Bakery | Yes | 3436104 | 65 | 197 | Yes | 2.0 |
| 19 | Annapurna Bistro | Jawalakhel | Bistro | Yes | 8025449 | 506 | 139 | Yes | 4.8 |
| 169 | Annapurna Bistro | Maharajgunj | Bistro | Yes | 8324338 | 548 | 164 | No | 4.8 |
| 99 | Annapurna Cafe | Patan | Bistro | No | 5265481 | 347 | 71 | Yes | 3.0 |
| 71 | Annapurna Cake Shop | Patan | Bistro | No | 5761502 | 417 | 82 | Yes | 4.3 |
In [78]:
# Index Reset
df= df.reset_index(drop=True)
df
Out[78]:
| Shop_Name | Shop_Location | Shop_Type | Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Annapurna Bakery | Jawalakhel | Bakery | Yes | 3436104 | 65 | 197 | Yes | 2.0 |
| 1 | Annapurna Bistro | Jawalakhel | Bistro | Yes | 8025449 | 506 | 139 | Yes | 4.8 |
| 2 | Annapurna Bistro | Maharajgunj | Bistro | Yes | 8324338 | 548 | 164 | No | 4.8 |
| 3 | Annapurna Cafe | Patan | Bistro | No | 5265481 | 347 | 71 | Yes | 3.0 |
| 4 | Annapurna Cake Shop | Patan | Bistro | No | 5761502 | 417 | 82 | Yes | 4.3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 195 | Pokhara Quick Stop | Jawalakhel | Bistro | Yes | 6288847 | 375 | 104 | No | 4.5 |
| 196 | Pokhara Restaurant | Bhaktapur | Restaurant | Yes | 8998621 | 248 | 270 | No | 5.0 |
| 197 | Pokhara Restaurant | Jawalakhel | Restaurant | No | 4148829 | 298 | 100 | Yes | 3.5 |
| 198 | Pokhara Supermarket | Lalitpur | Grocery | Yes | 3725943 | 286 | 92 | No | 3.0 |
| 199 | Pokhara Supermarket | Baneshwor | Grocery | No | 6674133 | 494 | 127 | No | 4.8 |
200 rows × 9 columns
In [79]:
df['Shop_Type'].unique()
Out[79]:
array(['Bakery', 'Bistro', 'Café', 'Restaurant', 'Convenience Store',
'Grocery', 'Lounge', 'Supermarket', 'Essentials', 'Grill'],
dtype=object)
In [80]:
# Shop Type Mapping
df['Shop_Type'] = df['Shop_Type'].replace({
'Café': 'Cafe',
'Grocery': 'Grocery',
'Restaurant': 'Restaurant',
'Bistro': 'Bistro',
'Bakery': 'Bakery',
'Convenience Store': 'Convenience Store',
'Lounge': 'Lounge'
})
df['Shop_Type'] = df['Shop_Type'].str.title()
In [81]:
# Website Mapping (0,1)
df['Shop_Website'] = df['Shop_Website'].replace({
'Yes': 1,
'No': 0
})
df.head()
Out[81]:
| Shop_Name | Shop_Location | Shop_Type | Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Annapurna Bakery | Jawalakhel | Bakery | 1 | 3436104 | 65 | 197 | Yes | 2.0 |
| 1 | Annapurna Bistro | Jawalakhel | Bistro | 1 | 8025449 | 506 | 139 | Yes | 4.8 |
| 2 | Annapurna Bistro | Maharajgunj | Bistro | 1 | 8324338 | 548 | 164 | No | 4.8 |
| 3 | Annapurna Cafe | Patan | Bistro | 0 | 5265481 | 347 | 71 | Yes | 3.0 |
| 4 | Annapurna Cake Shop | Patan | Bistro | 0 | 5761502 | 417 | 82 | Yes | 4.3 |
In [82]:
# Marketing Column Mapping
marketing_map = {
'Yes': 1,
'No': 0,
}
df['Marketing'] = df['Marketing'].map(marketing_map)
df.head()
Out[82]:
| Shop_Name | Shop_Location | Shop_Type | Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Annapurna Bakery | Jawalakhel | Bakery | 1 | 3436104 | 65 | 197 | 1 | 2.0 |
| 1 | Annapurna Bistro | Jawalakhel | Bistro | 1 | 8025449 | 506 | 139 | 1 | 4.8 |
| 2 | Annapurna Bistro | Maharajgunj | Bistro | 1 | 8324338 | 548 | 164 | 0 | 4.8 |
| 3 | Annapurna Cafe | Patan | Bistro | 0 | 5265481 | 347 | 71 | 1 | 3.0 |
| 4 | Annapurna Cake Shop | Patan | Bistro | 0 | 5761502 | 417 | 82 | 1 | 4.3 |
In [83]:
# Ratings Category (Low, Medium, High)
def categorize_rating(rating):
if rating <= 2.5:
return 'Low'
elif rating <= 4.0:
return 'Medium'
else:
return 'High'
df['Rating_Category'] = df['Rating'].apply(categorize_rating)
df.head()
Out[83]:
| Shop_Name | Shop_Location | Shop_Type | Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | Rating_Category | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Annapurna Bakery | Jawalakhel | Bakery | 1 | 3436104 | 65 | 197 | 1 | 2.0 | Low |
| 1 | Annapurna Bistro | Jawalakhel | Bistro | 1 | 8025449 | 506 | 139 | 1 | 4.8 | High |
| 2 | Annapurna Bistro | Maharajgunj | Bistro | 1 | 8324338 | 548 | 164 | 0 | 4.8 | High |
| 3 | Annapurna Cafe | Patan | Bistro | 0 | 5265481 | 347 | 71 | 1 | 3.0 | Medium |
| 4 | Annapurna Cake Shop | Patan | Bistro | 0 | 5761502 | 417 | 82 | 1 | 4.3 | High |
Visualizations¶
In [84]:
# count the each shop type
shop_type_counts = df['Shop_Type'].value_counts().reset_index()
shop_type_counts.columns = ['Shop_Type', 'Count']
# pie
fig = px.pie(shop_type_counts, names='Shop_Type', values='Count', title='Shop Type Distribution')
fig.update_layout(
template='plotly_white',
width=800,
height=600
)
fig.show()
In [85]:
# visualization the shop type with most foot traffic
shop_type_foot_traffic = df.groupby('Shop_Type')['Foot_Traffic'].mean().reset_index()
shop_type_foot_traffic = shop_type_foot_traffic.sort_values(by='Foot_Traffic', ascending=False)
#histogram
fig = px.histogram(shop_type_foot_traffic, x='Shop_Type', y='Foot_Traffic', title='Shop Type vs Foot Traffic',color='Shop_Type')
fig.update_layout(
xaxis_title='Shop Type',
yaxis_title='Foot Traffic',
template='plotly_white',
width=800,
height=600
)
fig.show()
In [86]:
#visualization based on ratings and Shop Type
shop_type_rating = df.groupby('Shop_Type')['Rating'].mean().reset_index()
shop_type_rating = shop_type_rating.sort_values(by='Rating', ascending=False)
#line
fig = px.line(shop_type_rating, x='Shop_Type', y='Rating', title='Shop Type vs Rating')
fig.update_layout(
xaxis_title='Shop Type',
yaxis_title='Rating',
template='plotly_white',
width=800,
height=600
)
fig.update_xaxes(tickangle=45)
fig.show()
In [87]:
#visualization of marketing and yearly sales
marketing_sales = df.groupby('Marketing')['Yearly_Sales'].mean().reset_index()
marketing_sales = marketing_sales.sort_values(by='Yearly_Sales', ascending=False)
#bar
fig = px.bar(marketing_sales, x='Marketing', y='Yearly_Sales', title='Yearly Sales by Marketing', color='Yearly_Sales')
fig.update_layout(
xaxis_title='Marketing',
yaxis_title='Yearly Sales',
template='plotly_white',
width=800,
height=600
)
fig.show()
In [88]:
# Visualization of Shop Type and Yearly Sales
shop_type_sales = df.groupby('Shop_Type')['Yearly_Sales'].mean().reset_index()
shop_type_sales = shop_type_sales.sort_values(by='Yearly_Sales', ascending=False)
# bar
fig = px.bar(shop_type_sales, x='Shop_Type', y='Yearly_Sales', title='Yearly Sales by Shop Type', color='Yearly_Sales', color_continuous_scale='magma')
fig.update_layout(
xaxis_title='Shop Type',
yaxis_title='Yearly Sales',
template='plotly_white',
width=800,
height=600
)
fig.update_xaxes(tickangle=45)
fig.show()
In [89]:
# Average order value by shop website
shop_website_avg_order = df.groupby('Shop_Website')['Average_Order_Value'].mean().reset_index()
shop_website_avg_order = shop_website_avg_order.sort_values(by='Average_Order_Value', ascending=False)
# Bar
fig = px.bar(shop_website_avg_order, x='Shop_Website', y='Average_Order_Value', title='Average Order Value by Shop Website', color='Average_Order_Value', color_continuous_scale='temps')
fig.update_layout(
xaxis_title='Shop Website',
yaxis_title='Average Order Value',
template='plotly_white',
width=800,
height=600
)
fig.update_xaxes(tickangle=45)
fig.show()
In [90]:
# Shop location and foot traffic
shop_location_foot_traffic = df.groupby('Shop_Location')['Foot_Traffic'].mean().reset_index()
shop_location_foot_traffic.columns = ['Shop_Location', 'Average_Foot_Traffic']
# Scatter
fig = px.scatter(shop_location_foot_traffic, x='Shop_Location', y='Average_Foot_Traffic', title='Shop Location by Foot Traffic', color='Average_Foot_Traffic')
fig.update_layout(
xaxis_title='Shop Location',
yaxis_title='Average Foot Traffic',
template='plotly_white',
width=800,
height=600
)
fig.show()
In [91]:
# visualization of shop type and average order value
shop_type_avg_order = df.groupby('Shop_Type')['Average_Order_Value'].mean().reset_index()
shop_type_avg_order = shop_type_avg_order.sort_values(by='Average_Order_Value', ascending=False)
# Bar
fig = px.bar(shop_type_avg_order, x='Shop_Type', y='Average_Order_Value', title='Average Order Value by Shop Type', color='Average_Order_Value', color_continuous_scale='viridis')
fig.update_layout(
xaxis_title='Shop Type',
yaxis_title='Average Order Value',
template='plotly_white',
width=800,
height=600
)
fig.update_xaxes(tickangle=45)
fig.show()
In [92]:
#average sales by shop location
shop_location_sales = df.groupby('Shop_Location')['Yearly_Sales'].mean().reset_index()
shop_location_sales = shop_location_sales.sort_values(by='Yearly_Sales', ascending=False)
# hist
fig = px.histogram(shop_location_sales, x='Shop_Location', y='Yearly_Sales', title='Shop Location vs Yearly Sales', color='Yearly_Sales')
fig.update_layout(
xaxis_title='Shop Location',
yaxis_title='Yearly Sales',
template='plotly_white',
width=800,
height=600
)
fig.show()
In [93]:
# Average Order Value by Shop Location
shop_location_avg_order = df.groupby('Shop_Location')['Average_Order_Value'].mean().reset_index()
fig = px.pie(shop_location_avg_order, names='Shop_Location', values='Average_Order_Value', title='Average Order Value by Shop Location')
fig.update_layout(template='plotly_white', width=800, height=600)
fig.show()
Model Training¶
In [94]:
df.columns
Out[94]:
Index(['Shop_Name', 'Shop_Location', 'Shop_Type', 'Shop_Website',
'Yearly_Sales', 'Average_Order_Value', 'Foot_Traffic', 'Marketing',
'Rating', 'Rating_Category'],
dtype='object')
In [95]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 200 entries, 0 to 199 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Shop_Name 200 non-null object 1 Shop_Location 200 non-null object 2 Shop_Type 200 non-null object 3 Shop_Website 200 non-null int64 4 Yearly_Sales 200 non-null int64 5 Average_Order_Value 200 non-null int64 6 Foot_Traffic 200 non-null int64 7 Marketing 200 non-null int64 8 Rating 200 non-null float64 9 Rating_Category 200 non-null object dtypes: float64(1), int64(5), object(4) memory usage: 15.8+ KB
In [96]:
df.describe()
Out[96]:
| Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | |
|---|---|---|---|---|---|---|
| count | 200.000000 | 2.000000e+02 | 200.000000 | 200.000000 | 200.00000 | 200.000000 |
| mean | 0.555000 | 6.160280e+06 | 320.485000 | 148.615000 | 0.50500 | 3.930000 |
| std | 0.498213 | 2.129283e+06 | 156.265107 | 63.782662 | 0.50123 | 1.076286 |
| min | 0.000000 | 2.530677e+06 | 55.000000 | 60.000000 | 0.00000 | 2.000000 |
| 25% | 0.000000 | 4.273359e+06 | 200.750000 | 96.000000 | 0.00000 | 3.000000 |
| 50% | 1.000000 | 6.309304e+06 | 289.500000 | 131.000000 | 1.00000 | 4.300000 |
| 75% | 1.000000 | 8.104099e+06 | 473.250000 | 207.000000 | 1.00000 | 4.800000 |
| max | 1.000000 | 9.489331e+06 | 599.000000 | 270.000000 | 1.00000 | 5.000000 |
In [97]:
corr = df.corr(numeric_only=1)
corr
Out[97]:
| Shop_Website | Yearly_Sales | Average_Order_Value | Foot_Traffic | Marketing | Rating | |
|---|---|---|---|---|---|---|
| Shop_Website | 1.000000 | 0.138697 | 0.082630 | 0.014507 | 0.059263 | 0.148724 |
| Yearly_Sales | 0.138697 | 1.000000 | 0.440359 | 0.407206 | 0.065382 | 0.935705 |
| Average_Order_Value | 0.082630 | 0.440359 | 1.000000 | -0.468402 | 0.003594 | 0.593349 |
| Foot_Traffic | 0.014507 | 0.407206 | -0.468402 | 1.000000 | 0.042893 | 0.193354 |
| Marketing | 0.059263 | 0.065382 | 0.003594 | 0.042893 | 1.000000 | 0.056542 |
| Rating | 0.148724 | 0.935705 | 0.593349 | 0.193354 | 0.056542 | 1.000000 |
In [98]:
# Heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='magma', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()
Random forest Regressor¶
In [99]:
features_reg = df[
[
"Shop_Website",
"Marketing",
"Rating",
"Average_Order_Value",
"Foot_Traffic"
]
]
target_reg = df["Yearly_Sales"]
# Split the data into training and testing sets
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(features_reg, target_reg, test_size=0.2, random_state=42)
regressor = RandomForestRegressor()
regressor.fit(X_train_reg, y_train_reg)
y_pred_rge = regressor.predict(X_test_reg)
mae = mean_absolute_error(y_test_reg, y_pred_rge)
r2 = r2_score(y_test_reg, y_pred_rge)
print(f"Mean Absolute Error: {mae}")
print(f"R2 Score: {r2}")
Mean Absolute Error: 333324.6024999999 R2 Score: 0.9515501455559853
In [100]:
# Visualization of predicted vs actual costs
fig_cost = px.scatter(x=y_test_reg, y=y_pred_rge, labels={'x': 'Actual Cost', 'y': 'Predicted Cost'}, title='Actual vs Predicted Cost')
fig_cost.add_shape(
type="line", line=dict(dash='dash'),
x0=y_test_reg.min(), y0=y_test_reg.min(),
x1=y_test_reg.max(), y1=y_test_reg.max()
)
fig_cost.update_layout(paper_bgcolor="white")
fig_cost.show()
cost_list = [[predicted] for actual, predicted in zip(y_test_reg, y_pred_rge)]
In [101]:
#plot decision tree of the random forest regressor
plt.figure(figsize=(20, 10))
plot_tree(regressor.estimators_[0], filled=True, feature_names=features_reg.columns)
plt.show()
Random Forest Classifier¶
In [102]:
from sklearn.metrics import accuracy_score, classification_report
features_clf = df[
[
'Shop_Website',
'Yearly_Sales',
'Average_Order_Value',
'Foot_Traffic',
'Marketing'
]
]
target_clf = df['Rating_Category']
X_train, X_test, y_train, y_test = train_test_split(features_clf, target_clf, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
In [103]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['Low', 'Medium', 'High'])
print("Accuracy:", accuracy)
print("Classification Report:\n", report)
Accuracy: 0.95
Classification Report:
precision recall f1-score support
Low 0.92 1.00 0.96 23
Medium 1.00 1.00 1.00 2
High 1.00 0.87 0.93 15
accuracy 0.95 40
macro avg 0.97 0.96 0.96 40
weighted avg 0.95 0.95 0.95 40
In [104]:
import plotly.figure_factory as ff
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
class_names = ['Low', 'Medium', 'High']
fig_cm = ff.create_annotated_heatmap(
z=cm,
x=[f'Predicted {label}' for label in class_names],
y=[f'Actual {label}' for label in class_names],
colorscale='viridis'
)
fig_cm.update_layout(
title='Confusion Matrix',
xaxis=dict(title='Predicted Label'),
yaxis=dict(title='Actual Label'),
paper_bgcolor="white"
)
fig_cm.show()
cm
Out[104]:
array([[23, 0, 0],
[ 0, 2, 0],
[ 2, 0, 13]])
Hyperparameter tuning and cross validation¶
In [105]:
#hyperparameter tuning and cross validation for RandomForestRegressor
rfc = RandomForestRegressor(
random_state=42,
n_estimators=300,
max_depth=3,
min_samples_split=2,
min_samples_leaf=2,
)
rfc.fit(X_train_reg, y_train_reg)
y_pred_reg = rfc.predict(X_test_reg)
cv_scores_r2 = cross_val_score(rfc, features_reg, target_reg, cv=5, scoring='r2')
cv_scores_mae = cross_val_score(rfc, features_reg, target_reg, cv=5, scoring='neg_mean_absolute_error')
cv_scores_mae = -cv_scores_mae
print("Mean R2 score:", cv_scores_r2.mean())
print("Mean MAE score:", cv_scores_mae.mean())
Mean R2 score: 0.9632741545892657 Mean MAE score: 326892.12028971047
In [106]:
# actual and predicted values after hyperparameter tuning and cross validation
fig_cost = px.scatter(x=y_test_reg, y=y_pred_reg, labels={'x': 'Actual Cost', 'y': 'Predicted Cost'}, title='Actual vs Predicted Cost')
fig_cost.add_shape(
type="line", line=dict(dash='dash'),
x0=y_test_reg.min(), y0=y_test_reg.min(),
x1=y_test_reg.max(), y1=y_test_reg.max()
)
fig_cost.update_layout(paper_bgcolor="white")
fig_cost.show()
In [107]:
#hyperparameter tuning and cross validation for RandomForestClassifier
rfc = RandomForestClassifier(
random_state=42,
n_estimators=300,
max_depth=3,
min_samples_split=2,
min_samples_leaf=2,
)
rfc.fit(X_train, y_train)
y_pred_rfc = rfc.predict(X_test)
cv_scores_rfc = cross_val_score(rfc, features_clf, target_clf, cv=5, scoring='accuracy')
print("Cross-validation accuracy scores for Random Forest Classifier:", cv_scores_rfc)
print("Mean accuracy score:", cv_scores_rfc.mean())
Cross-validation accuracy scores for Random Forest Classifier: [0.95 0.9 1. 1. 1. ] Mean accuracy score: 0.97
In [108]:
# df.to_csv('cleaned_food_and_beverages.csv')
#to view the actual and predicted values
# for actual, predicted in zip(y_test_reg, y_pred_gbr):
# print(f"Actual: {actual}, Predicted: {int(predicted)}")